In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns
import plotly.graph_objects as go
In [2]:
data = pd.read_csv("data.csv")
In [3]:
data.head(5)
Out[3]:
ID Source TMC Severity Start_Time End_Time Start_Lat Start_Lng End_Lat End_Lng ... Roundabout Station Stop Traffic_Calming Traffic_Signal Turning_Loop Sunrise_Sunset Civil_Twilight Nautical_Twilight Astronomical_Twilight
0 A-1 MapQuest 201.0 3 2016-02-08 05:46:00 2016-02-08 11:00:00 39.865147 -84.058723 NaN NaN ... False False False False False False Night Night Night Night
1 A-2 MapQuest 201.0 2 2016-02-08 06:07:59 2016-02-08 06:37:59 39.928059 -82.831184 NaN NaN ... False False False False False False Night Night Night Day
2 A-3 MapQuest 201.0 2 2016-02-08 06:49:27 2016-02-08 07:19:27 39.063148 -84.032608 NaN NaN ... False False False False True False Night Night Day Day
3 A-4 MapQuest 201.0 3 2016-02-08 07:23:34 2016-02-08 07:53:34 39.747753 -84.205582 NaN NaN ... False False False False False False Night Day Day Day
4 A-5 MapQuest 201.0 2 2016-02-08 07:39:07 2016-02-08 08:09:07 39.627781 -84.188354 NaN NaN ... False False False False True False Day Day Day Day

5 rows × 49 columns

In [4]:
BBox = ((data["Start_Lng"].min(),   data["Start_Lng"].max(),      
         data["Start_Lat"].min(), data["Start_Lat"].max()))
BBox
Out[4]:
(-124.623833, -67.113167, 24.570222, 49.000759)

Dataset contains 2.2 milions of rows. We definetely will not need all of them,so we will take a sample

In [5]:
data_sample = data.sample(n=3000)[['Start_Lng','Start_Lat','City','Visibility(mi)','Severity']]
In [6]:
LON = data_sample['Start_Lng']
LAT = data_sample['Start_Lat']
TEXT = data_sample['City']
SEVERITY = data_sample['Severity']
In [7]:
GEO_SCOPE = "usa"
PROJECTION = 'albers usa'
COLORS = "Blues"
MODE = "markers"
LOCATION_MODE = 'USA-states'
LAND = "rgb(255, 255, 255)"
UNIT = "rgb(0, 0, 0)"
COUNTRY = "rgb(0, 0, 0)"

Simple attempt

In [8]:
data.plot(kind="scatter", x="Start_Lng", y="Start_Lat", alpha=0.4,figsize=(25,15))
plt.show()

Not very informative is`nt it ?

Lets Divide accidents by state, and draw some kind of heat-map

In [9]:
fig = go.Figure(data=go.Choropleth(locations=pd.value_counts(data['State']).index,z = pd.value_counts(data['State']).astype(float),
    locationmode = LOCATION_MODE, colorscale = COLORS,colorbar_title = "Accidents"))

fig.update_layout(title_text = 'Accidents Heat-map',geo_scope=GEO_SCOPE)

fig.show()
In [10]:
sample_data = data.sample(n=1000)

fig = go.Figure(data=go.Scattergeo(
        locationmode = LOCATION_MODE,
        lon = LON,
        lat = LAT,
        text = TEXT,
        mode = MODE,
        marker = dict(
            size = 8,
            opacity = 0.8,
            reversescale = True,
            autocolorscale = False,
            symbol = 'circle',
            line = dict( width=1,color='rgba(0, 0, 0)'),
            colorscale = COLORS,
            cmin = SEVERITY.min(),
        color = SEVERITY,
        cmax =SEVERITY.max(),
            colorbar_title = "Severity"
        )))

fig.update_layout(
        title = 'Severity of accidents',
        geo = dict(
            scope=GEO_SCOPE,
            projection_type= PROJECTION,
            showland = True,
            landcolor = LAND,subunitcolor = UNIT,countrycolor = COUNTRY,
            countrywidth = 3,
            subunitwidth = 3
        ),
    )
fig.show()
In [11]:
df_county = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/laucnty16.csv')
df_county.head(3)
Out[11]:
LAUS Code State FIPS Code County FIPS Code County Name/State Abbreviation Year Labor Force Employed Unemployed Unemployment Rate (%)
0 CN0100100000000 1 1 Autauga County, AL 2016 25,649 24,297 1,352 5.3
1 CN0100300000000 1 3 Baldwin County, AL 2016 89,931 85,061 4,870 5.4
2 CN0100500000000 1 5 Barbour County, AL 2016 8,302 7,584 718 8.6
In [12]:
df_county['county_full'] = df_county['County Name/State Abbreviation'].apply(lambda x: x.split(', ')[0])
df_county['county_name'] = df_county['county_full'].apply(lambda x: x.split(' County')[0])


fips_county_df = df_county[['county_name', 'County FIPS Code', 'State FIPS Code']].merge(pd.value_counts(data['County']).to_frame(), left_on='county_name', right_index=True)
In [13]:
import plotly.figure_factory as ff

fips_county_df['State FIPS Code'] = fips_county_df['State FIPS Code'].apply(lambda x: str(x).zfill(2))
fips_county_df['County FIPS Code'] = fips_county_df['County FIPS Code'].apply(lambda x: str(x).zfill(3))
fips_county_df['FIPS'] = fips_county_df['State FIPS Code'] + fips_county_df['County FIPS Code']

colorscale = ["#f7fbff", "#ebf3fb", "#deebf7", "#d2e3f3", "#c6dbef", "#b3d2e9", "#9ecae1",
    "#85bcdb", "#6baed6", "#57a0ce", "#4292c6", "#3082be", "#2171b5", "#1361a9",
    "#08519c", "#0b4083", "#08306b"
]
endpts = list(np.linspace(1,30000, len(colorscale) - 1))
fips = fips_county_df['FIPS'].tolist()
values = fips_county_df['County'].tolist()


fig = ff.create_choropleth(
    fips=fips, values=values, scope=[GEO_SCOPE],
    binning_endpoints=endpts, colorscale=colorscale,
    show_state_data=False,
    show_hover=True,
    asp = 2.9,
    title_text = 'USA County accidents count',
    legend_title = 'Accidents count'
)
fig.layout.template = None
fig.show()
In [14]:
data_sample.dropna(inplace=True)
fig = go.Figure(data=go.Scattergeo(
        locationmode = LOCATION_MODE,
        lon = LON,lat = LAT,text = TEXT,mode = MODE,
        marker = dict(
            size = data_sample['Visibility(mi)'],
            opacity = 0.8,
            reversescale = True,
            autocolorscale = False,
            symbol = 'circle',
            line = dict(
                width=1,
                color='rgba(102, 102, 102)'
            ),
            colorscale = COLORS,
            cmin = SEVERITY.max(),
        color = SEVERITY,
        cmax = 1,
            colorbar_title="Severity"
        )))

fig.update_layout(
        title = 'Severity & Visibility of accidents',
        geo = dict(
            scope=GEO_SCOPE,
            projection_type=PROJECTION,
            showland = True,
            landcolor = LAND, subunitcolor = UNIT,countrycolor = COUNTRY,
            countrywidth = 0.7,
            subunitwidth = 0.7
        ),
    )
fig.show()